import numpy
glo = 0
ghi = 47
np = 1

nlev = 4
maxlev = 4
glen = ghi - glo
llen = (ghi - glo) / np
cutoff = 12
skin = 2
order = 4
maxcomm = numpy.zeros(maxlev, dtype=numpy.int)
nproc = numpy.zeros(maxlev, dtype=numpy.int)
delinv = numpy.zeros(maxlev)
nmsm = numpy.zeros(maxlev, dtype=numpy.int)
nmsm_proc = numpy.zeros(maxlev, dtype=numpy.int)
get_llo = lambda p: llen * p + glo
get_lhi = lambda p: llen * (p + 1) + glo
ifloor = lambda x: int(numpy.floor(x))
iceil = lambda x: int(numpy.ceil(x))
get_nlo = lambda nm, np, p: p * ifloor(nm / np) + min(p, nm % np)
get_nhi = lambda nm, np, p: get_nlo(nm, np, p + 1) - 1
grid_for_restrict= lambda nlo, nhi, delinv, i: [nlo * delinv[i - 1] / delinv[i] - order + 1, nhi * delinv[i - 1] / delinv[i] + order - 1]
grid_for_direct = lambda nlo, nhi, delinv, i: [nlo - ndirect, nhi + ndirect]
grid_for_prolong = lambda nlo, nhi, delinv, i: [(nlo - order + 1) * delinv[i + 1] / delinv[i], (nhi + order - 1) * delinv[i + 1] / delinv[i]]

for i in range(maxlev):
  nmsm[i] = int(2 ** (nlev - i))
  if nmsm[i] == 0:
    nmsm[i] = 1
  delinv[i] = nmsm[i] / glen

ndirect = ifloor(2 * cutoff * delinv[0])
halo = max(order, ndirect)

upper_level = lambda lo, hi, delinv, i: [iceil((lo - order + 1) * delinv[i + 1] / delinv[i]), ifloor((hi + order - 1) * delinv[i + 1] / delinv[i])]
current_level = lambda lo, hi: [lo - ndirect, hi + ndirect]
#qout for anterop/restriction output, ein=qout for prolong input, qin=eout for direct output, local for corresponding
qout_lo, qout_hi, qin_lo, qin_hi, local_lo, local_hi = [[[0 for p in range(np)] for i in range(maxlev)] for _ in range(6)]
for p in range(np):
  qout_lo[0][p] = ifloor((get_llo(p) - skin * 0.5 - glo) * delinv[0]) - halo
  qout_hi[0][p] = ifloor((get_lhi(p) + skin * 0.5 - glo) * delinv[0]) + halo - 1
  #print((get_llo(p) - glo - skin * 0.5) * delinv[0], (get_lhi(p) - glo), skin * 0.5, delinv[0], halo)
  local_lo[0][p] = ifloor((get_llo(p) - glo) * delinv[0])
  local_hi[0][p] = ifloor((get_lhi(p) - glo) * delinv[0]) - 1
  qin_lo[0][p], qin_hi[0][p] = qout_lo[0][p], qout_hi[0][p]

MIN_NMSM_PROC = 4
nproc = [np]
for i in range(1, maxlev):
  nproc.append(min(max(ifloor(nmsm[i] / MIN_NMSM_PROC), 1), np))
  for p in range(nproc[i]):
    local_lo[i][p] = get_nlo(nmsm[i], nproc[i], p)
    local_hi[i][p] = get_nhi(nmsm[i], nproc[i], p)
    qin_lo[i][p], qin_hi[i][p] = current_level(local_lo[i][p], local_hi[i][p])
  for p in range(nproc[i - 1]):
    qout_lo[i][p], qout_hi[i][p] = upper_level(local_lo[i - 1][p], local_hi[i - 1][p], delinv, i - 1)
nproc.append(np)
print("qin:")
for i in range(maxlev):
  print(list(zip(qin_lo[i], qin_hi[i])))
print("qout:")
for i in range(maxlev):
  print(list(zip(qout_lo[i], qout_hi[i])))
print("local:")
for i in range(maxlev):
  print(list(zip(local_lo[i], local_hi[i])))
def generate_comm_schedule(p, recv_lo, recv_hi, send_lo, send_hi, nsend, nrecv, nmsm):
  sends = []
  deltamin = ifloor(min(min(recv_lo, send_lo)) / nmsm)
  deltamax = iceil(max(max(recv_hi, send_hi)) / nmsm)

  if p < nsend:
    #print(deltamin, deltamax)
    for j in range(0, nrecv):
      for delta in range(deltamin, deltamax + 1):
        if (recv_hi[j] + delta * nmsm >= send_lo[p] and recv_lo[j] + delta * nmsm <= send_hi[p]):
          sends.append((j, max(recv_lo[j] + delta * nmsm, send_lo[p]), min(recv_hi[j] + delta * nmsm, send_hi[p]), delta))
  recvs = []
  if p < nrecv:
    for j in range(0, nsend):
      for delta in range(deltamin, deltamax + 1):
        if (recv_hi[p] >= send_lo[j] - delta * nmsm and recv_lo[p] < send_hi[j] - delta * nmsm):
          recvs.append((j, max(recv_lo[p], send_lo[j] - delta * nmsm), min(recv_hi[p], send_hi[j] - delta * nmsm), delta))
  return sends, recvs
def strsched(sched, prefix, connector):
  commstr = ", ".join(map(
    lambda x: "[%d:%d]%s%d(%d)" % (x[1], x[2], connector, x[0], x[3]),
    sched))
  if sched:
    return prefix + commstr
  else:
    return ""
for p in range(0, np):
  for i in range(0, maxlev):
    if i == 0:
      print('%d: generage qgrid0[%d:%d]' % (p, qout_lo[i][p], qout_hi[i][p]))
    sends, recvs = generate_comm_schedule(p, local_lo[i], local_hi[i], qout_lo[i], qout_hi[i], nproc[i - 1], nproc[i], nmsm[i])
    print(strsched(sends, "%d: send qgrid%d" % (p, i), '->'))
    print(strsched(recvs, "%d: recv+ qgrid%d" % (p, i), '<-'))
    print("%d: now holds all local qgrid" % p)
    if i < maxlev - 1 and p < nproc[i]:
      print('%d: restriction qgrid%d[%d:%d] from qgrid%d[%d:%d]' % (p, i + 1, qout_lo[i + 1][p], qout_hi[i + 1][p], i, local_lo[i][p], local_hi[i][p]))
  print('%d: end of restriction' % p)
  for i in range(maxlev - 1, -1, -1):
    eout_lo = qin_lo
    eout_hi = qin_hi
    sends, recvs = generate_comm_schedule(p, qin_lo[i], qin_hi[i], local_lo[i], local_hi[i], nproc[i], nproc[i], nmsm[i])
    print(strsched(sends, "%d: send qgrid%d" % (p, i), '->'))
    print(strsched(recvs, "%d: recv qgrid%d" % (p, i), '<-'))
    print('%d: direct egrid%d[%d:%d] from qgrid%d[%d:%d]' % (p, i, eout_lo[i][p], eout_hi[i][p], i, qin_lo[i][p], qout_lo[i][p]))
    sends, recvs = generate_comm_schedule(p, local_lo[i], local_hi[i], qin_lo[i], qin_hi[i], nproc[i], nproc[i - 1], nmsm[i])
    print(strsched(sends, "%d: send egrid%d" % (p, i), '->'))
    print(strsched(recvs, "%d: recv+ egrid%d" % (p, i), '<-'))
    if i > 0:
      print('%d: prolong egrid%d[%d:%d]' % (p, i - 1, qout_lo[i - 1][p], qout_hi[i - 1][p]))
    else:
      print('%d: interop egrid%d[%d:%d]' % (p, i, qout_lo[i][p], qout_hi[i][p]))
